package com.company.project.utils;


import com.company.project.config.Environment;
import com.company.project.entity.Applicant;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.util.PDFTextStripperByArea;


import java.awt.*;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Slf4j
public class ReadLineUtils {

    public static List<String> getContent(String orgPath) {
        //新建一个List<String>类型的集合，将后面从pdf抓取的内容放入该集合
        List<String> contentAll = new ArrayList<String>();
        try {
            //加载pdf文件
            PDDocument document = PDDocument.load(new File(orgPath));
            try {
                String data = "";
                if (document.isEncrypted()) {
                    document.decrypt("");
                }
                //创建pdf内容的获取器
                PDFTextStripperByArea stripper = new PDFTextStripperByArea();
                stripper.setSortByPosition(true);
                //划定区域，在该区域内获取pdf的内容
                Rectangle rect = new Rectangle(0, 0, 10000, 10000);
                stripper.addRegion("area", rect);
                //PDPage表示pdf文件的一页，这里获取了pdf文档的每页的集合
                List<PDPage> allPages = document.getDocumentCatalog().getAllPages();
                //下面对每页的数据按照上面划定的区域，提取区域内每行的文本内容，并加入到contentAll里面
                for (PDPage page : allPages) {
                    stripper.extractRegions(page);
                    //获取区域的text
                    data = stripper.getTextForRegion("area");
                    System.out.println(data);
                    String[] datas = data.split("\r\n");
                    //对文本进行分行处理
                    for (String s : datas) {
                        contentAll.add(s);
                    }
                }
            } catch (Exception e) {
                log.error("文件内容读取失败", orgPath);
            } finally {
                document.close();
            }
        } catch (IOException ioException) {
            log.error("文件装载失败");
        }
        return contentAll;
    }

    //获取申请人信息
    public static Applicant getApplicantInfoFromContent(File srcFile) {
        Applicant applicant = new Applicant();
        try {
            PDDocument document = PDDocument.load(srcFile);
            try {
                String data = "";
                if (document.isEncrypted()) {
                    document.decrypt("");
                }
                PDFTextStripperByArea stripper = new PDFTextStripperByArea();
                stripper.setSortByPosition(true);

//                String filename = PDF2ImageUtils.extractImages(srcFile, "F:\\download\\用户资料"+File.separator+"images",0);
                String filename = PDF2ImageUtils.extractImages(srcFile, Environment.accountFilePath + File.separator + "images", 0);
                String content = ZXingUtils.decodeImg(new File(filename));

                //划定区域
                Rectangle rect = new Rectangle(0, 0, 10000, 10000);
                stripper.addRegion("area", rect);
                List<PDPage> allPages = document.getDocumentCatalog().getAllPages();
                String appDate;
                String dataStr;
                String tmp;
                String className = null;
                for (PDPage page : allPages) {
                    stripper.extractRegions(page);
                    //获取区域的text
                    data = stripper.getTextForRegion("area");
                    String[] datas = data.split("\r\n");
                    //对文本进行分行处理
                    for (int i = 0; i < datas.length; i++) {
                        dataStr = datas[i];
                        //获取冒号后面的内容，进行匹配并封装到实体类中
                        if (dataStr.indexOf(":") >= 0 || dataStr.indexOf("：") >= 0) {
                            //如果使用的是中文符号也要进行判断，不排除使用中文符号的情况,读取冒号后面的内容
                            if (isContainChinese("：")) {
                                tmp = dataStr.substring(dataStr.indexOf("：") + 1).trim();
                            } else {
                                tmp = dataStr.substring(dataStr.indexOf(":") + 1).trim();
                            }
                            if (dataStr.indexOf("姓名") >= 0) {
                                if (!tmp.equals("") && isContainChinese(tmp)) {
                                    /*if (tmp.indexOf("(") >= 0) {
                                        tmp = tmp.substring(0, tmp.indexOf("(")).trim();
                                    }
                                    if (tmp.indexOf("（") >= 0) {
                                        tmp = tmp.substring(0, tmp.indexOf("（")).trim();
                                    }*/
                                    String[] names = tmp.split(" ");
                                    if (names[0].length()<2){
                                        applicant.setName(names[1]);
                                    }
                                    applicant.setName(names[0]);
                                }
                            } else if (dataStr.indexOf("联系方式") >= 0) {
                                if (!tmp.equals("") && isContainChinese(tmp)) {
                                    String matcherStr1 = MatcherAssistUtils.getMatcherStr(tmp, "^1[3|4|5|8][0-9]\\d{4,8}$");
                                    if (matcherStr1==null){
                                        //查看联系方式是否在下一行
                                        tmp = getStringBehindColon(datas[i+1]);
                                        String matcherStr2 = MatcherAssistUtils.getMatcherStr(tmp, "^1[3|4|5|8][0-9]\\d{4,8}$").trim();
                                        applicant.setPhone(matcherStr2);
                                    }
                                    applicant.setPhone(matcherStr1);
                                }
                            } else if (dataStr.indexOf("电话") >= 0) {
                                if (!dataStr.equals("")) {

                                    applicant.setPhone(tmp);
                                }
                            } else if (dataStr.indexOf("邮箱") >= 0) {
                                if (!tmp.equals("") && isContainChinese(tmp)) {
                                    applicant.setEmail(tmp);
                                }
                            } else if (dataStr.indexOf("手机") >= 0){
                                /*if (!tmp.equals("") && isContainChinese(tmp)) {
                                    applicant.setPhone(tmp);
                                }*/
                                String matcherStr1 = MatcherAssistUtils.getMatcherStr(tmp, "^[1]([3-9])[0-9]{9}$");
                                if (matcherStr1==null || "".equals(matcherStr1)){
                                    //查看联系方式是否在下一行
                                    tmp = getStringBehindColon(datas[i+1]);
                                    String matcherStr2 = MatcherAssistUtils.getMatcherStr(tmp, "^[1]([3-9])[0-9]{9}$").trim();
                                    applicant.setPhone(matcherStr2);
                                }
                                applicant.setPhone(matcherStr1);
                            } else if (dataStr.indexOf("E-mail") >= 0){
                                if (!tmp.equals("") && isContainChinese(tmp)) {
                                    applicant.setEmail(tmp);
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                log.error("文件内容识别失败");
            } finally {
                document.close();
            }
        } catch (IOException ioException) {
            log.error("读取文件失败", srcFile.getName());
        }
        return applicant;
    }

    //获取回文信息
    public static Map<String, String> getContentSendContent(String srcFile) {
        Map<String, String> retMap = new HashMap<String, String>();
        String appName = null;
        String appNum = null;
        try {
            //获取标题
            Rectangle textRrecttitle = new Rectangle(0, 0, 580, 32);
            String strContenttitle = PdfboxUtils.readRectangelText(srcFile, 0, textRrecttitle).trim();

            Rectangle yztextRrecttitle = new Rectangle(0, 0, 580, 73);
            String yzstrContenttitle = PdfboxUtils.readRectangelText(srcFile, 0, yztextRrecttitle).trim();

            if (strContenttitle.equals("国家知识产权局")) {
                String data = "";
                appName = getApplyName(srcFile, 0);
                appNum = getAppNum(srcFile, 0);
            } else if (PdfboxUtils.fileSize(srcFile) > 1 && yzstrContenttitle.equals("引  证  商  标")) {
                Map<String, String> resultMap = getYZSBMap(srcFile);
                appName = resultMap.get("appName");
                appNum = resultMap.get("appNum");
            } else {
                //处理商标电子档pdf文件
                appName = getApplyRegName(srcFile, 0);
                if (null != appName && !appName.equals("")) {
                    appNum = getRegAppNum(srcFile, 0);
                } else {
                    appNum = getAppNum(srcFile, 0);
                }
            }
            retMap.put("appName", appName);
            retMap.put("appNum", appNum);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return retMap;
    }

    /**
     *  获取冒号后面的内容
     * @param string
     * @return
     */
    private static String getStringBehindColon(String string){
        if (isContainChinese("：")) {
             return string.substring(string.indexOf("：") + 1).trim();
        } else {
            return string.substring(string.indexOf(":") + 1).trim();
        }
    }

    /**
     * 获取用户资料申请号
     * @param content
     * @return
     */
    private static String getUserInfoAppNum(String content) {
        //匹配申请号
        String regx = "[A-Z]{4}[0-9]{14,16}";
        String regx2 = "[1-9][0-9]*";
        Pattern p1 = Pattern.compile(regx);
        Pattern p2 = Pattern.compile(regx2);
        Matcher m = p1.matcher(content);
        String appnun = MatcherAssistUtils.getAppNum(m);
        String title = appnun.substring(0, 4);
        m = p2.matcher(appnun);
        appnun = MatcherAssistUtils.getAppNum(m);
        return title + appnun.trim();
    }


    /**
     * 获取申请号
     *
     * @param srcFile
     * @param ipage
     * @return
     */
    private static String getRegAppNum(String srcFile, int ipage) {

        //匹配申请号
        String reg1 = "第\\s?(\\d+)[A-Z]\\s?号";
        String reg2 = "[0-9]{6,15}[A-Z]?";
        Pattern p1 = Pattern.compile(reg1);
        Pattern p2 = Pattern.compile(reg2);

        Rectangle textRrect = new Rectangle(0, 0, 572, 114);
        String strContent = PdfboxUtils.readRectangelText(srcFile, ipage, textRrect);
        strContent = strContent.replaceAll("\r\n", "");
        Matcher m = p1.matcher(strContent);
        String appnun = MatcherAssistUtils.getAppNum(m);
        if (null != appnun && !appnun.equals("")) {
            m = p2.matcher(appnun);
            appnun = MatcherAssistUtils.getAppNum(m);
        }
        return appnun.trim();
    }

    /**
     * 获取申请号
     *
     * @param srcFile
     * @param ipage
     * @return
     */
    private static String getAppNum(String srcFile, int ipage) {

        //匹配申请号
        String regx = "^(注册申请号|转让申请号|转让申请号|变更申请号|备案号)+(:|：)?\\s?(\\d+)$";
        String reg2 = "申请号[:|：| ]\\s*[0-9]*";
        String reg3 = "第\\s?(\\d+)\\s?号";
        String reg4 = "[0-9]{6,15}";
        Pattern p1 = Pattern.compile(regx);
        Pattern p2 = Pattern.compile(reg2);
        Pattern p3 = Pattern.compile(reg3);
        Pattern p4 = Pattern.compile(reg4);

        Rectangle textRrect = new Rectangle(0, 180, 10000, 10000);
        String strContent = PdfboxUtils.readRectangelText(srcFile, ipage, textRrect);
        strContent = strContent.replaceAll("\r\n", "");
        Matcher m = p1.matcher(strContent);
        String appnun = MatcherAssistUtils.getAppNum(m);
        if (null == appnun && appnun.equals("")) {
            m = p4.matcher(strContent);
            appnun = MatcherAssistUtils.getAppNum(m);
        } else {
            //匹配申请号
            m = p2.matcher(strContent);
            appnun = MatcherAssistUtils.getAppNum(m);
            if (null != appnun && !appnun.equals("")) {
                m = p4.matcher(appnun);
                appnun = MatcherAssistUtils.getAppNum(m);
            } else {
                //匹配第***号
                m = p3.matcher(strContent);
                appnun = MatcherAssistUtils.getAppNum(m);
                if (null != appnun && !appnun.equals("")) {
                    m = p4.matcher(appnun);
                    appnun = MatcherAssistUtils.getAppNum(m);
                }
            }
        }
        return appnun.trim();
    }


    /**
     * 对引证明商标进行处理
     *
     * @param srcFile
     * @return
     */
    public static Map<String, String> getYZSBMap(String srcFile) {
        Map<String, String> retMap = new HashMap<String, String>();
        String appName = "";
        String appNum = "";
        try {
            PDDocument document = PDDocument.load(srcFile);
            int size = document.getDocumentCatalog().getAllPages().size();
            String title = "";
            for (int i = 0; i < size; i++) {
                title = getYZSBTitle(srcFile, i);
                if (!title.equals("引  证  商  标")) {
                    appName = getApplyName(srcFile, i);
                    appNum = getAppNum(srcFile, i);
                    break;
                }
            }
        } catch (Exception ex) {
            log.error("引证商标处理失败");
        }
        retMap.put("appNum", appNum);
        retMap.put("appName", appName);
        return retMap;
    }

    public static String getYZSBTitle(String srcFile, int ipage) {
        Rectangle textRrecttitle = new Rectangle(0, 0, 580, 73);
        String strContenttitle = PdfboxUtils.readRectangelText(srcFile, ipage, textRrecttitle).trim();
        return strContenttitle;
    }

    /**
     * 获取申请人名字
     *
     * @param srcFile
     * @param ipage
     * @return
     */
    private static String getApplyName(String srcFile, int ipage) {
        Rectangle textRrect = new Rectangle(18, 61, 331, 160);
        String strContent = PdfboxUtils.readRectangelText(srcFile, ipage, textRrect);
        String[] datas = strContent.split("\r\n");
        return datas[datas.length - 1].trim();
    }


    /**
     * 获取商标注册人名字
     *
     * @param srcFile
     * @param ipage
     * @return
     */
    private static String getApplyRegName(String srcFile, int ipage) {
        Rectangle textRrect = new Rectangle(0, 500, 579, 510);
        String strContent = PdfboxUtils.readRectangelText(srcFile, ipage, textRrect);
        String[] datas = strContent.split("\r\n");
        String appName = "";
        for (int i = 0; i < datas.length; i++) {
            if (datas[i].startsWith("注　册　人")) {
                appName = datas[i].replaceAll("注　册　人", "").trim();
                break;
            }
        }
        return appName;
    }


    /**
     * 判断字符是否是中文，能校验是否为中文标点符号
     *
     * @param str 待校验字符
     * @return 是否为中文
     */
    public static boolean isContainChinese(String str) {
        // 中文字
        Pattern p = Pattern.compile("[\u4e00-\u9fa5]");
        Matcher m = p.matcher(String.valueOf(str));
        if (m.find()) {
            return true;
        }
        // 中文标点符号
        p = Pattern.compile("[\uFF01]|[\uFF0C-\uFF0E]|[\uFF1A-\uFF1B]|[\uFF1F]|[\uFF08-\uFF09]|[\u3001-\u3002]|[\u3010-\u3011]|[\u201C-\u201D]|[\u2013-\u2014]|[\u2018-\u2019]|[\u2026]|[\u3008-\u300F]|[\u3014-\u3015]");
        m = p.matcher(String.valueOf(str));
        return m.find();
    }

    /**
     * 判断特殊符号
     *
     * @param str
     * @return
     */
    public static boolean isContainChineseSymbol(String str) {
        Pattern p = Pattern.compile("[\uFF01]|[\uFF0C-\uFF0E]|[\uFF1A-\uFF1B]|[\uFF1F]|[\uFF08-\uFF09]|[\u3001-\u3002]|[\u3010-\u3011]|[\u201C-\u201D]|[\u2013-\u2014]|[\u2018-\u2019]|[\u2026]|[\u3008-\u300F]|[\u3014-\u3015]");
        Matcher m = p.matcher(String.valueOf(str));
        return m.find();
    }


    public static void main(String[] args) throws Exception {

        Map<String, String> retMap = getContentSendContent("F:\\download\\发文\\1600236186178810.pdf");
//
//        String appName = retMap.get("appName");
//        String appNum = retMap.get("appNum");

//        ClUserInfo clUserInfo = getContentClUsersInfo(new File("F:\\download\\用户资料\\1600235965283725.pdf"));
//        ClUserInfo clUserInfo = getContentClUsersInfo(new File("F:\\download\\用户资料\\1600235064491637.pdf"));

//        System.out.println(clUserInfo.toString());

    }
}
